-
Notifications
You must be signed in to change notification settings - Fork 15.3k
AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext #168410
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) ChangesFull diff: https://github.com/llvm/llvm-project/pull/168410.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
return RALSrc;
+ // RALSrc = G_ANYEXT S16Src
+ // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+ // Src = G_TRUNC TruncSrc
+ if (mi_match(Src, MRI,
+ m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+ return RALSrc;
+ }
+
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
// AextSrc = G_TRUNC TruncSrc
// Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11-True16-NEXT: ds_store_b16 v0, v1
; GFX11-True16-NEXT: s_endpgm
;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX12-True16-NEXT: s_wait_dscnt 0x0
-; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT: s_wait_alu 0xf1ff
-; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX12-True16-NEXT: ds_store_b16 v0, v1
; GFX12-True16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
|
|
@llvm/pr-subscribers-llvm-globalisel Author: Petar Avramovic (petar-avramovic) ChangesFull diff: https://github.com/llvm/llvm-project/pull/168410.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
return RALSrc;
+ // RALSrc = G_ANYEXT S16Src
+ // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+ // Src = G_TRUNC TruncSrc
+ if (mi_match(Src, MRI,
+ m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+ return RALSrc;
+ }
+
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
// AextSrc = G_TRUNC TruncSrc
// Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11-True16-NEXT: ds_store_b16 v0, v1
; GFX11-True16-NEXT: s_endpgm
;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX12-True16-NEXT: s_wait_dscnt 0x0
-; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT: s_wait_alu 0xf1ff
-; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX12-True16-NEXT: ds_store_b16 v0, v1
; GFX12-True16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
|
🐧 Linux x64 Test Results
|
3590a6e to
9e70882
Compare
vangthao95
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would we want a .mir test for this? Otherwise LGTM.

No description provided.